{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "This file is for replication Table2\n",
    "You need invent_location.csv, and all controls files\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "starting_year = [1976, 1986, 1996, 2006]\n",
    "end_year = [1985, 1995, 2005, 2015]\n",
    "LOC = pd.read_csv('invent_location.csv', sep='*')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "Z = ['3-digit','Any','Primary','Common']\n",
    "r = 2 # rounding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "Table = pd.DataFrame()\n",
    "\n",
    "for x, y in zip(starting_year, end_year):\n",
    "    result = pd.DataFrame()\n",
    "    \n",
    "    for z in [1,2,3,4]:\n",
    "        file_in = 'controls' + str(z) + '_' + str(x) + '_' + str(y) +'.csv'\n",
    "        www = pd.read_csv(file_in, sep=','); www = www[['cited','citing','citing_control','period']]\n",
    "        www = pd.merge(www,LOC,left_on='cited',right_on='wku',how='inner'); www = www.drop('wku', axis=1)\n",
    "        www.columns = ['cited','citing','control','period','cnt_cited','sta_cited','cmsa_cited']\n",
    "        www = www[www.cnt_cited == \"US\"] # cited=US\n",
    "    \n",
    "        nn = len(www[www.period==0])\n",
    "        www = www[www.period > 0 ]\n",
    "        www = pd.merge(www,LOC,left_on='citing',right_on='wku',how='inner'); www = www.drop('wku', axis=1)\n",
    "        www.columns = ['cited','citing','control','period','cnt_cited', 'sta_cited','cmsa_cited','cnt_citing','sta_citing','cmsa_citing']\n",
    "        www = pd.merge(www,LOC,left_on='control',right_on='wku',how='inner'); www = www.drop('wku', axis=1)\n",
    "        www.columns = ['cited','citing','control','period','cnt_cited','sta_cited','cmsa_cited','cnt_citing','sta_citing','cmsa_citing','cnt_control','sta_control','cmsa_control']\n",
    "        \n",
    "        if z == 1:\n",
    "            \n",
    "            cnt_citing = 100*len(www[www.cnt_cited == www.cnt_citing])/len(www)\n",
    "            state_citing = 100*len(www[www.sta_cited == www.sta_citing])/len(www)\n",
    "            CMSA_citing = 100*len(www[www.cmsa_cited == www.cmsa_citing])/len(www)\n",
    "            \n",
    "            cnt_control = 100*len(www[www.cnt_cited == www.cnt_control])/len(www)\n",
    "            state_control = 100*len(www[www.sta_cited == www.sta_control])/len(www)\n",
    "            CMSA_control = 100*len(www[www.cmsa_cited == www.cmsa_control])/len(www)\n",
    "        \n",
    "            Result = pd.DataFrame(columns=['year', Z[z-1]])\n",
    "            Result.loc[str(x)] = [ 'country', str( round((cnt_citing - cnt_control)*100/cnt_citing,r)) +'%']\n",
    "            Result.loc[' '] = [ 'state', str(round((state_citing - state_control)*100/state_citing,r)) +'%']\n",
    "            Result.loc['  '] = [ 'CMSA', str(round((CMSA_citing - CMSA_control)*100/CMSA_citing,r)) +'%']\n",
    "\n",
    "                      \n",
    "            result = pd.concat([result, Result], axis = 1)\n",
    "        \n",
    "        else:\n",
    "            cnt_control = 100*len(www[www.cnt_cited == www.cnt_control])/len(www)\n",
    "            state_control = 100*len(www[www.sta_cited == www.sta_control])/len(www)\n",
    "            CMSA_control = 100*len(www[www.cmsa_cited == www.cmsa_control])/len(www)\n",
    "            \n",
    "            Result = pd.DataFrame(columns=[Z[z-1]])\n",
    "            Result.loc[str(x)] = [ str( round((cnt_citing - cnt_control)*100/cnt_citing,r)) +'%']\n",
    "            Result.loc[' '] = [ str(round((state_citing - state_control)*100/state_citing,r)) +'%']\n",
    "            Result.loc['  '] = [ str(round((CMSA_citing - CMSA_control)*100/CMSA_citing,r)) +'%']\n",
    "\n",
    "            result = pd.concat([result, Result], axis = 1)\n",
    "\n",
    "    Table = pd.concat([Table,result], axis = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "print(result.to_latex())"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
